{
  "cells": [
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "cHgpJN1uzD8B"
      },
      "source": [
        "# Tutorial on instruction tuning of Chinese-Alpaca-7B\n",
        "\n",
        "More info: https://github.com/ymcui/Chinese-LLaMA-Alpaca"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "2meQHBlHxcsi"
      },
      "source": [
        "## Install Dependencies"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "quRXOPaZwmwz",
        "outputId": "0fadc49a-b1a0-4131-9997-58c6c1a76fc4"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting transformers==4.28.1\n",
            "  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.0/7.0 MB\u001b[0m \u001b[31m53.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (3.12.0)\n",
            "Collecting huggingface-hub<1.0,>=0.11.0 (from transformers==4.28.1)\n",
            "  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.5/224.5 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (23.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (6.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (2022.10.31)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (2.27.1)\n",
            "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.1)\n",
            "  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m98.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.28.1) (4.65.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers==4.28.1) (2023.4.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers==4.28.1) (4.5.0)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.28.1) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.28.1) (2022.12.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.28.1) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.28.1) (3.4)\n",
            "Installing collected packages: tokenizers, huggingface-hub, transformers\n",
            "Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting git+https://github.com/huggingface/peft.git@13e53fc\n",
            "  Cloning https://github.com/huggingface/peft.git (to revision 13e53fc) to /tmp/pip-req-build-ccgd0ls9\n",
            "  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-req-build-ccgd0ls9\n",
            "\u001b[33m  WARNING: Did not find branch or tag '13e53fc', assuming revision or ref.\u001b[0m\u001b[33m\n",
            "\u001b[0m  Running command git checkout -q 13e53fc\n",
            "  Resolved https://github.com/huggingface/peft.git to commit 13e53fc\n",
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft==0.3.0.dev0) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.3.0.dev0) (23.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft==0.3.0.dev0) (5.9.5)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft==0.3.0.dev0) (6.0)\n",
            "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.3.0.dev0) (2.0.0+cu118)\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft==0.3.0.dev0) (4.28.1)\n",
            "Collecting accelerate (from peft==0.3.0.dev0)\n",
            "  Downloading accelerate-0.19.0-py3-none-any.whl (219 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m219.1/219.1 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.3.0.dev0) (3.12.0)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.3.0.dev0) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.3.0.dev0) (1.11.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.3.0.dev0) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.3.0.dev0) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.3.0.dev0) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.3.0.dev0) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.3.0.dev0) (16.0.3)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.3.0.dev0) (0.14.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.3.0.dev0) (2022.10.31)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.3.0.dev0) (2.27.1)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.3.0.dev0) (0.13.3)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.3.0.dev0) (4.65.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers->peft==0.3.0.dev0) (2023.4.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft==0.3.0.dev0) (2.1.2)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.3.0.dev0) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.3.0.dev0) (2022.12.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.3.0.dev0) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft==0.3.0.dev0) (3.4)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft==0.3.0.dev0) (1.3.0)\n",
            "Building wheels for collected packages: peft\n",
            "  Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for peft: filename=peft-0.3.0.dev0-py3-none-any.whl size=40652 sha256=be80ba07bd8ab5299c123251a644846225e6b44e69e49a85212b0acf8ed05f59\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-9iaw_m7j/wheels/d9/13/c6/404d5f8a81c5620f65f7fd75b6a66619f013cd79c2875b981c\n",
            "Successfully built peft\n",
            "Installing collected packages: accelerate, peft\n",
            "Successfully installed accelerate-0.19.0 peft-0.3.0.dev0\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting datasets\n",
            "  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m474.6/474.6 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.22.4)\n",
            "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n",
            "Collecting dill<0.3.7,>=0.3.0 (from datasets)\n",
            "  Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.27.1)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.65.0)\n",
            "Collecting xxhash (from datasets)\n",
            "  Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting multiprocess (from datasets)\n",
            "  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.4.0)\n",
            "Collecting aiohttp (from datasets)\n",
            "  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.14.1)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n",
            "Collecting responses<0.19 (from datasets)\n",
            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
            "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.0.12)\n",
            "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets)\n",
            "  Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting async-timeout<5.0,>=4.0.0a3 (from aiohttp->datasets)\n",
            "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
            "Collecting yarl<2.0,>=1.0 (from aiohttp->datasets)\n",
            "  Downloading yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (268 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m31.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting frozenlist>=1.1.1 (from aiohttp->datasets)\n",
            "  Downloading frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (149 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting aiosignal>=1.1.2 (from aiohttp->datasets)\n",
            "  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (3.12.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (4.5.0)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (1.26.15)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n",
            "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2022.7.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
            "Installing collected packages: xxhash, multidict, frozenlist, dill, async-timeout, yarl, responses, multiprocess, aiosignal, aiohttp, datasets\n",
            "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.12.0 dill-0.3.6 frozenlist-1.3.3 multidict-6.0.4 multiprocess-0.70.14 responses-0.18.0 xxhash-3.2.0 yarl-1.9.2\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting sentencepiece\n",
            "  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: sentencepiece\n",
            "Successfully installed sentencepiece-0.1.99\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting deepspeed\n",
            "  Downloading deepspeed-0.9.2.tar.gz (779 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m779.3/779.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting hjson (from deepspeed)\n",
            "  Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting ninja (from deepspeed)\n",
            "  Downloading ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (145 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m146.0/146.0 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (23.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from deepspeed) (5.9.5)\n",
            "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed) (9.0.0)\n",
            "Requirement already satisfied: pydantic<2.0.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.10.7)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.0.0+cu118)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from deepspeed) (4.65.0)\n",
            "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<2.0.0->deepspeed) (4.5.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.12.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (1.11.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->deepspeed) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->deepspeed) (16.0.3)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->deepspeed) (2.1.2)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->deepspeed) (1.3.0)\n",
            "Building wheels for collected packages: deepspeed\n",
            "  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for deepspeed: filename=deepspeed-0.9.2-py3-none-any.whl size=811220 sha256=00fc399e490063552f9d2be31b65ad7c35f72a82750be6620340046283b26149\n",
            "  Stored in directory: /root/.cache/pip/wheels/a6/d2/b1/b15210b5dc024bab4eccbac2148db29959fe01fe6042557d07\n",
            "Successfully built deepspeed\n",
            "Installing collected packages: ninja, hjson, deepspeed\n",
            "Successfully installed deepspeed-0.9.2 hjson-3.1.0 ninja-1.11.1\n"
          ]
        }
      ],
      "source": [
        "!pip install transformers==4.28.1\n",
        "!pip install git+https://github.com/huggingface/peft.git@13e53fc\n",
        "!pip install datasets\n",
        "!pip install sentencepiece\n",
        "!pip install deepspeed"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "ji21WFqexASI"
      },
      "source": [
        "## Clone our repository\n",
        "\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dulrlPMexFNN",
        "outputId": "c24273ec-d313-40c2-c0d9-1495524c84db"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Cloning into 'Chinese-LLaMA-Alpaca'...\n",
            "remote: Enumerating objects: 889, done.\u001b[K\n",
            "remote: Counting objects: 100% (330/330), done.\u001b[K\n",
            "remote: Compressing objects: 100% (220/220), done.\u001b[K\n",
            "remote: Total 889 (delta 127), reused 190 (delta 108), pack-reused 559\u001b[K\n",
            "Receiving objects: 100% (889/889), 11.28 MiB | 26.30 MiB/s, done.\n",
            "Resolving deltas: 100% (517/517), done.\n"
          ]
        }
      ],
      "source": [
        "!git clone https://github.com/ymcui/Chinese-LLaMA-Alpaca.git"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "pqH_h_ZAz4_e"
      },
      "source": [
        "## Instruction tuning for Alpaca-7B\n",
        "\n",
        "This follows the setting in https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/SFT-Script, except that to simplify the tutorial,\n",
        "- continue training the Chinese-Alpaca-LoRA\n",
        "- only train 100 steps\n",
        "- omit validation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "!mkdir Chinese-LLaMA-Alpaca/sft_data\n",
        "!cp Chinese-LLaMA-Alpaca/data/alpaca_data_zh_51k.json Chinese-LLaMA-Alpaca/sft_data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IhrAVNUKSw9_",
        "outputId": "791aa99b-ab3d-4e0b-f001-429dbc56dc4d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "2023-05-12 05:07:23.549181: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "[2023-05-12 05:07:24,469] [INFO] [comm.py:622:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n",
            "05/12/2023 05:07:26 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True\n",
            "[INFO|configuration_utils.py:668] 2023-05-12 05:07:27,030 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--decapoda-research--llama-7b-hf/snapshots/5f98eefcc80e437ef68d457ad7bf167c2c6a1348/config.json\n",
            "[INFO|configuration_utils.py:720] 2023-05-12 05:07:27,031 >> Model config LlamaConfig {\n",
            "  \"_name_or_path\": \"decapoda-research/llama-7b-hf\",\n",
            "  \"architectures\": [\n",
            "    \"LLaMAForCausalLM\"\n",
            "  ],\n",
            "  \"bos_token_id\": 0,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 4096,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 11008,\n",
            "  \"max_position_embeddings\": 2048,\n",
            "  \"max_sequence_length\": 2048,\n",
            "  \"model_type\": \"llama\",\n",
            "  \"num_attention_heads\": 32,\n",
            "  \"num_hidden_layers\": 32,\n",
            "  \"pad_token_id\": -1,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"float16\",\n",
            "  \"transformers_version\": \"4.28.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32000\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:1809] 2023-05-12 05:07:27,074 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--ziqingyang--chinese-alpaca-lora-7b/snapshots/89dd628daa79cae8b930f2a7066006c39d6ac454/tokenizer.model\n",
            "[INFO|tokenization_utils_base.py:1809] 2023-05-12 05:07:27,074 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1809] 2023-05-12 05:07:27,074 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--ziqingyang--chinese-alpaca-lora-7b/snapshots/89dd628daa79cae8b930f2a7066006c39d6ac454/special_tokens_map.json\n",
            "[INFO|tokenization_utils_base.py:1809] 2023-05-12 05:07:27,074 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--ziqingyang--chinese-alpaca-lora-7b/snapshots/89dd628daa79cae8b930f2a7066006c39d6ac454/tokenizer_config.json\n",
            "05/12/2023 05:07:27 - INFO - __main__ - training files: /content/Chinese-LLaMA-Alpaca/data/alpaca_data_zh_51k.json\n",
            "05/12/2023 05:07:27 - WARNING - root - building dataset...\n",
            "05/12/2023 05:07:27 - INFO - __name__ - training datasets-/content/Chinese-LLaMA-Alpaca/data/alpaca_data_zh_51k.json has been loaded from disk\n",
            "05/12/2023 05:07:27 - INFO - __main__ - Num train_samples  51179\n",
            "05/12/2023 05:07:27 - INFO - __main__ - training example:\n",
            "05/12/2023 05:07:27 - INFO - __main__ - <s> Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
            "\n",
            "### Instruction:\n",
            "我们如何在日常生活中减少用水？\n",
            "\n",
            "### Response:  1. 使用节水装置，如节水淋浴喷头和水龙头。 \n",
            "2. 使用水箱或水桶收集家庭废水，例如洗碗和洗浴。 \n",
            "3. 在社区中提高节水意识。 \n",
            "4. 检查水管和灌溉系统的漏水情况，并及时修复它们。 \n",
            "5. 洗澡时间缩短，使用低流量淋浴头节约用水。 \n",
            "6. 收集雨水，用于园艺或其他非饮用目的。 \n",
            "7. 刷牙或擦手时关掉水龙头。 \n",
            "8. 减少浇水草坪的时间。 \n",
            "9. 尽可能多地重复使用灰水（来自洗衣机、浴室水槽和淋浴的水）。 \n",
            "10. 只购买能源效率高的洗碗机和洗衣机。</s>\n",
            "[INFO|modeling_utils.py:2534] 2023-05-12 05:07:27,107 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--decapoda-research--llama-7b-hf/snapshots/5f98eefcc80e437ef68d457ad7bf167c2c6a1348/pytorch_model.bin.index.json\n",
            "[INFO|modeling_utils.py:1176] 2023-05-12 05:07:27,110 >> Instantiating LlamaForCausalLM model under default dtype torch.float16.\n",
            "[INFO|configuration_utils.py:575] 2023-05-12 05:07:27,110 >> Generate config GenerationConfig {\n",
            "  \"_from_model_config\": true,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"pad_token_id\": -1,\n",
            "  \"transformers_version\": \"4.28.1\"\n",
            "}\n",
            "\n",
            "Loading checkpoint shards: 100% 33/33 [00:13<00:00,  2.46it/s]\n",
            "[INFO|modeling_utils.py:3190] 2023-05-12 05:07:40,719 >> All model checkpoint weights were used when initializing LlamaForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:3198] 2023-05-12 05:07:40,719 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at decapoda-research/llama-7b-hf.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.\n",
            "[INFO|configuration_utils.py:537] 2023-05-12 05:07:40,762 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--decapoda-research--llama-7b-hf/snapshots/5f98eefcc80e437ef68d457ad7bf167c2c6a1348/generation_config.json\n",
            "[INFO|configuration_utils.py:575] 2023-05-12 05:07:40,762 >> Generate config GenerationConfig {\n",
            "  \"_from_model_config\": true,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"transformers_version\": \"4.28.1\"\n",
            "}\n",
            "\n",
            "len(tokenizer):49954\n",
            "05/12/2023 05:08:12 - INFO - __main__ - Peft from pre-trained model\n",
            "trainable params: 429211648 || all params: 6905483264 || trainable%: 6.215519342977586\n",
            "model.modules_to_save: ['embed_tokens', 'lm_head']\n",
            "[INFO|trainer.py:564] 2023-05-12 05:09:45,537 >> max_steps is given, it will override any value given in num_train_epochs\n",
            "[INFO|trainer.py:621] 2023-05-12 05:09:45,537 >> Using cuda_amp half precision backend\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[2023-05-12 05:09:45,560] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.2, git-hash=unknown, git-branch=unknown\n",
            "05/12/2023 05:09:49 - INFO - torch.distributed.distributed_c10d - Added key: store_based_barrier_key:2 to store for rank: 0\n",
            "05/12/2023 05:09:49 - INFO - torch.distributed.distributed_c10d - Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 1 nodes.\n",
            "[2023-05-12 05:09:49,272] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False\n",
            "[2023-05-12 05:09:49,272] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer\n",
            "[2023-05-12 05:09:49,272] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer\n",
            "[2023-05-12 05:09:49,316] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = AdamW\n",
            "[2023-05-12 05:09:49,316] [INFO] [utils.py:54:is_zero_supported_optimizer] Checking ZeRO support for optimizer=AdamW type=<class 'transformers.optimization.AdamW'>\n",
            "[2023-05-12 05:09:49,316] [WARNING] [engine.py:1104:_do_optimizer_sanity_check] **** You are using ZeRO with an untested optimizer, proceed with caution *****\n",
            "[2023-05-12 05:09:49,317] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer\n",
            "[2023-05-12 05:09:49,317] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 100000000\n",
            "[2023-05-12 05:09:49,317] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 100000000\n",
            "[2023-05-12 05:09:49,317] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False\n",
            "[2023-05-12 05:09:49,317] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False\n",
            "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n",
            "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/utils/build.ninja...\n",
            "Building extension module utils...\n",
            "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
            "ninja: no work to do.\n",
            "Loading extension module utils...\n",
            "Time to load utils op: 0.0975944995880127 seconds\n",
            "Rank: 0 partition count [1] and sizes[(429211648, False)] \n",
            "[2023-05-12 05:09:51,439] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states\n",
            "[2023-05-12 05:09:51,440] [INFO] [utils.py:786:see_memory_usage] MA 14.49 GB         Max_MA 15.29 GB         CA 15.73 GB         Max_CA 16 GB \n",
            "[2023-05-12 05:09:51,440] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 4.29 GB, percent = 5.1%\n",
            "[2023-05-12 05:09:51,639] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states\n",
            "[2023-05-12 05:09:51,640] [INFO] [utils.py:786:see_memory_usage] MA 17.69 GB         Max_MA 20.89 GB         CA 22.12 GB         Max_CA 22 GB \n",
            "[2023-05-12 05:09:51,640] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 4.3 GB, percent = 5.1%\n",
            "[2023-05-12 05:09:51,641] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized\n",
            "[2023-05-12 05:09:51,832] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer\n",
            "[2023-05-12 05:09:51,832] [INFO] [utils.py:786:see_memory_usage] MA 17.69 GB         Max_MA 17.69 GB         CA 22.12 GB         Max_CA 22 GB \n",
            "[2023-05-12 05:09:51,833] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 4.29 GB, percent = 5.1%\n",
            "[2023-05-12 05:09:51,846] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = AdamW\n",
            "[2023-05-12 05:09:51,846] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler\n",
            "[2023-05-12 05:09:51,846] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f7fe71ff8e0>\n",
            "[2023-05-12 05:09:51,846] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0], mom=[(0.9, 0.999)]\n",
            "[2023-05-12 05:09:51,848] [INFO] [config.py:955:print] DeepSpeedEngine configuration:\n",
            "[2023-05-12 05:09:51,848] [INFO] [config.py:959:print]   activation_checkpointing_config  {\n",
            "    \"partition_activations\": false, \n",
            "    \"contiguous_memory_optimization\": false, \n",
            "    \"cpu_checkpointing\": false, \n",
            "    \"number_checkpoints\": null, \n",
            "    \"synchronize_checkpoint_boundary\": false, \n",
            "    \"profile\": false\n",
            "}\n",
            "[2023-05-12 05:09:51,848] [INFO] [config.py:959:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   amp_enabled .................. False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   amp_params ................... False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   autotuning_config ............ {\n",
            "    \"enabled\": false, \n",
            "    \"start_step\": null, \n",
            "    \"end_step\": null, \n",
            "    \"metric_path\": null, \n",
            "    \"arg_mappings\": null, \n",
            "    \"metric\": \"throughput\", \n",
            "    \"model_info\": null, \n",
            "    \"results_dir\": \"autotuning_results\", \n",
            "    \"exps_dir\": \"autotuning_exps\", \n",
            "    \"overwrite\": true, \n",
            "    \"fast\": true, \n",
            "    \"start_profile_step\": 3, \n",
            "    \"end_profile_step\": 5, \n",
            "    \"tuner_type\": \"gridsearch\", \n",
            "    \"tuner_early_stopping\": 5, \n",
            "    \"tuner_num_trials\": 50, \n",
            "    \"model_info_path\": null, \n",
            "    \"mp_size\": 1, \n",
            "    \"max_train_batch_size\": null, \n",
            "    \"min_train_batch_size\": 1, \n",
            "    \"max_train_micro_batch_size_per_gpu\": 1.024000e+03, \n",
            "    \"min_train_micro_batch_size_per_gpu\": 1, \n",
            "    \"num_tuning_micro_batch_sizes\": 3\n",
            "}\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   bfloat16_enabled ............. False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   checkpoint_parallel_write_pipeline  False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   checkpoint_tag_validation_enabled  True\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   checkpoint_tag_validation_fail  False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f7fe71ffc40>\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   communication_data_type ...... None\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   curriculum_enabled_legacy .... False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   curriculum_params_legacy ..... False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   data_efficiency_enabled ...... False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   dataloader_drop_last ......... False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   disable_allgather ............ False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   dump_state ................... False\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1e-10}\n",
            "[2023-05-12 05:09:51,849] [INFO] [config.py:959:print]   eigenvalue_enabled ........... False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_gas_boundary_resolution  1\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_layer_name ........ bert.encoder.layer\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_layer_num ......... 0\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_max_iter .......... 100\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_stability ......... 1e-06\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_tol ............... 0.01\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   eigenvalue_verbose ........... False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   elasticity_enabled ........... False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   flops_profiler_config ........ {\n",
            "    \"enabled\": false, \n",
            "    \"profile_step\": 1, \n",
            "    \"module_depth\": -1, \n",
            "    \"top_modules\": 1, \n",
            "    \"detailed\": true, \n",
            "    \"output_file\": null\n",
            "}\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   fp16_auto_cast ............... False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   fp16_enabled ................. True\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   fp16_master_weights_and_gradients  False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   global_rank .................. 0\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   grad_accum_dtype ............. None\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   gradient_accumulation_steps .. 1\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   gradient_clipping ............ 1.0\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   gradient_predivide_factor .... 1.0\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   initial_dynamic_scale ........ 65536\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   load_universal_checkpoint .... False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   loss_scale ................... 0\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   memory_breakdown ............. False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   mics_hierarchial_params_gather  False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   mics_shard_size .............. -1\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   nebula_config ................ {\n",
            "    \"enabled\": false, \n",
            "    \"persistent_storage_path\": null, \n",
            "    \"persistent_time_interval\": 100, \n",
            "    \"num_of_version_in_retention\": 2, \n",
            "    \"enable_nebula_load\": true, \n",
            "    \"load_path\": null\n",
            "}\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   optimizer_legacy_fusion ...... False\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   optimizer_name ............... None\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   optimizer_params ............. None\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}\n",
            "[2023-05-12 05:09:51,850] [INFO] [config.py:959:print]   pld_enabled .................. False\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   pld_params ................... False\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   prescale_gradients ........... False\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   scheduler_name ............... None\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   scheduler_params ............. None\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   sparse_attention ............. None\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   sparse_gradients_enabled ..... False\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   steps_per_print .............. 2000\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   train_batch_size ............. 1\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   train_micro_batch_size_per_gpu  1\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   use_node_local_storage ....... False\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   wall_clock_breakdown ......... False\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   world_size ................... 1\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   zero_allow_untested_optimizer  True\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=100000000 allgather_partitions=True allgather_bucket_size=100000000 overlap_comm=True load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False mics_shard_size=-1 mics_hierarchical_params_gather=False memory_efficient_linear=True\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   zero_enabled ................. True\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   zero_force_ds_cpu_optimizer .. True\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:959:print]   zero_optimization_stage ...... 2\n",
            "[2023-05-12 05:09:51,851] [INFO] [config.py:945:print_user_config]   json = {\n",
            "    \"fp16\": {\n",
            "        \"enabled\": true, \n",
            "        \"loss_scale\": 0, \n",
            "        \"loss_scale_window\": 100, \n",
            "        \"initial_scale_power\": 16, \n",
            "        \"hysteresis\": 2, \n",
            "        \"min_loss_scale\": 1e-10\n",
            "    }, \n",
            "    \"zero_optimization\": {\n",
            "        \"stage\": 2, \n",
            "        \"allgather_partitions\": true, \n",
            "        \"allgather_bucket_size\": 1.000000e+08, \n",
            "        \"overlap_comm\": true, \n",
            "        \"reduce_scatter\": true, \n",
            "        \"reduce_bucket_size\": 1.000000e+08, \n",
            "        \"contiguous_gradients\": true\n",
            "    }, \n",
            "    \"gradient_accumulation_steps\": 1, \n",
            "    \"gradient_clipping\": 1.0, \n",
            "    \"steps_per_print\": 2.000000e+03, \n",
            "    \"train_batch_size\": 1, \n",
            "    \"train_micro_batch_size_per_gpu\": 1, \n",
            "    \"wall_clock_breakdown\": false, \n",
            "    \"zero_allow_untested_optimizer\": true\n",
            "}\n",
            "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n",
            "No modifications detected for re-loaded extension module utils, skipping build step...\n",
            "Loading extension module utils...\n",
            "Time to load utils op: 0.0003616809844970703 seconds\n",
            "[INFO|trainer.py:1769] 2023-05-12 05:09:51,853 >> ***** Running training *****\n",
            "[INFO|trainer.py:1770] 2023-05-12 05:09:51,853 >>   Num examples = 51,179\n",
            "[INFO|trainer.py:1771] 2023-05-12 05:09:51,853 >>   Num Epochs = 1\n",
            "[INFO|trainer.py:1772] 2023-05-12 05:09:51,854 >>   Instantaneous batch size per device = 1\n",
            "[INFO|trainer.py:1773] 2023-05-12 05:09:51,854 >>   Total train batch size (w. parallel, distributed & accumulation) = 1\n",
            "[INFO|trainer.py:1774] 2023-05-12 05:09:51,854 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1775] 2023-05-12 05:09:51,854 >>   Total optimization steps = 100\n",
            "[INFO|trainer.py:1776] 2023-05-12 05:09:51,857 >>   Number of trainable parameters = 429,211,648\n",
            "  0% 0/100 [00:00<?, ?it/s][WARNING|logging.py:295] 2023-05-12 05:09:51,880 >> `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n",
            "[2023-05-12 05:09:53,218] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1\n",
            "{'loss': 1.8672, 'learning_rate': 0.0, 'epoch': 0.0}\n",
            "  1% 1/100 [00:01<02:13,  1.35s/it][2023-05-12 05:09:53,556] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768\n",
            "  8% 8/100 [00:04<00:41,  2.20it/s][2023-05-12 05:09:56,507] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384\n",
            "{'loss': 1.5694, 'learning_rate': 9.958100506132127e-05, 'epoch': 0.0}\n",
            " 15% 15/100 [00:07<00:35,  2.37it/s][2023-05-12 05:09:59,385] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192\n",
            "{'loss': 1.4827, 'learning_rate': 9.563325576007701e-05, 'epoch': 0.0}\n",
            "{'loss': 1.7555, 'learning_rate': 8.675731968536002e-05, 'epoch': 0.0}\n",
            "{'loss': 1.5479, 'learning_rate': 7.405929722454026e-05, 'epoch': 0.0}\n",
            "{'loss': 1.9587, 'learning_rate': 5.885954957896115e-05, 'epoch': 0.0}\n",
            " 50% 50/100 [00:22<00:21,  2.34it/s][INFO|trainer.py:2868] 2023-05-12 05:10:13,882 >> Saving model checkpoint to /content/output_model/checkpoint-50\n",
            "[INFO|trainer.py:2880] 2023-05-12 05:10:13,889 >> Trainer.model is not a `PreTrainedModel`, only saving its state dict.\n",
            "[INFO|tokenization_utils_base.py:2171] 2023-05-12 05:10:15,418 >> tokenizer config file saved in /content/output_model/checkpoint-50/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2178] 2023-05-12 05:10:15,418 >> Special tokens file saved in /content/output_model/checkpoint-50/special_tokens_map.json\n",
            "[2023-05-12 05:10:15,420] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step50 is about to be saved!\n",
            "[2023-05-12 05:10:23,879] [INFO] [logging.py:96:log_dist] [Rank 0] Saving model checkpoint: /content/output_model/checkpoint-50/global_step50/mp_rank_00_model_states.pt\n",
            "[2023-05-12 05:10:23,879] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /content/output_model/checkpoint-50/global_step50/mp_rank_00_model_states.pt...\n",
            "[2023-05-12 05:11:02,126] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /content/output_model/checkpoint-50/global_step50/mp_rank_00_model_states.pt.\n",
            "[2023-05-12 05:11:02,639] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /content/output_model/checkpoint-50/global_step50/zero_pp_rank_0_mp_rank_00_optim_states.pt...\n",
            "[2023-05-12 05:11:17,041] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /content/output_model/checkpoint-50/global_step50/zero_pp_rank_0_mp_rank_00_optim_states.pt.\n",
            "[2023-05-12 05:11:17,041] [INFO] [engine.py:3228:_save_zero_checkpoint] zero checkpoint saved /content/output_model/checkpoint-50/global_step50/zero_pp_rank_0_mp_rank_00_optim_states.pt\n",
            "[2023-05-12 05:11:17,042] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step50 is ready now!\n",
            "{'loss': 2.2106, 'learning_rate': 4.27385714377255e-05, 'epoch': 0.0}\n",
            "{'loss': 1.7592, 'learning_rate': 2.737264854777306e-05, 'epoch': 0.0}\n",
            "{'loss': 1.0934, 'learning_rate': 1.4359554772658552e-05, 'epoch': 0.0}\n",
            "{'loss': 1.7842, 'learning_rate': 5.05241294573024e-06, 'epoch': 0.0}\n",
            "{'loss': 2.0819, 'learning_rate': 4.189949386787462e-07, 'epoch': 0.0}\n",
            "100% 100/100 [01:46<00:00,  2.35it/s][INFO|trainer.py:2868] 2023-05-12 05:11:38,410 >> Saving model checkpoint to /content/output_model/checkpoint-100\n",
            "[INFO|trainer.py:2880] 2023-05-12 05:11:38,416 >> Trainer.model is not a `PreTrainedModel`, only saving its state dict.\n",
            "[INFO|tokenization_utils_base.py:2171] 2023-05-12 05:11:39,937 >> tokenizer config file saved in /content/output_model/checkpoint-100/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2178] 2023-05-12 05:11:39,937 >> Special tokens file saved in /content/output_model/checkpoint-100/special_tokens_map.json\n",
            "[2023-05-12 05:11:39,939] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step100 is about to be saved!\n",
            "[2023-05-12 05:11:48,459] [INFO] [logging.py:96:log_dist] [Rank 0] Saving model checkpoint: /content/output_model/checkpoint-100/global_step100/mp_rank_00_model_states.pt\n",
            "[2023-05-12 05:11:48,459] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /content/output_model/checkpoint-100/global_step100/mp_rank_00_model_states.pt...\n",
            "[2023-05-12 05:12:27,009] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /content/output_model/checkpoint-100/global_step100/mp_rank_00_model_states.pt.\n",
            "[2023-05-12 05:12:27,603] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /content/output_model/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt...\n",
            "[2023-05-12 05:12:41,962] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /content/output_model/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt.\n",
            "[2023-05-12 05:12:41,962] [INFO] [engine.py:3228:_save_zero_checkpoint] zero checkpoint saved /content/output_model/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt\n",
            "[2023-05-12 05:12:41,962] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step100 is ready now!\n",
            "[INFO|trainer.py:2039] 2023-05-12 05:12:41,965 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "{'train_runtime': 170.1089, 'train_samples_per_second': 0.588, 'train_steps_per_second': 0.588, 'train_loss': 1.72732666015625, 'epoch': 0.0}\n",
            "100% 100/100 [02:50<00:00,  1.70s/it]\n",
            "[INFO|trainer.py:2868] 2023-05-12 05:12:42,261 >> Saving model checkpoint to /content/output_model\n",
            "[INFO|trainer.py:2880] 2023-05-12 05:12:42,271 >> Trainer.model is not a `PreTrainedModel`, only saving its state dict.\n",
            "[INFO|tokenization_utils_base.py:2171] 2023-05-12 05:12:44,658 >> tokenizer config file saved in /content/output_model/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2178] 2023-05-12 05:12:44,658 >> Special tokens file saved in /content/output_model/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =        0.0\n",
            "  train_loss               =     1.7273\n",
            "  train_runtime            = 0:02:50.10\n",
            "  train_samples            =      51179\n",
            "  train_samples_per_second =      0.588\n",
            "  train_steps_per_second   =      0.588\n"
          ]
        }
      ],
      "source": [
        "!cd Chinese-LLaMA-Alpaca/scripts && torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \\\n",
        "    --deepspeed ds_zero2_no_offload.json \\\n",
        "    --model_name_or_path decapoda-research/llama-7b-hf \\\n",
        "    --tokenizer_name_or_path ziqingyang/chinese-alpaca-lora-7b \\\n",
        "    --dataset_dir /content/Chinese-LLaMA-Alpaca/sft_data \\\n",
        "    --validation_split_percentage 0.001 \\\n",
        "    --per_device_train_batch_size 1 \\\n",
        "    --do_train \\\n",
        "    --fp16 \\\n",
        "    --seed $RANDOM \\\n",
        "    --max_steps 100 \\\n",
        "    --lr_scheduler_type cosine \\\n",
        "    --learning_rate 1e-4 \\\n",
        "    --warmup_ratio 0.03 \\\n",
        "    --weight_decay 0 \\\n",
        "    --logging_strategy steps \\\n",
        "    --logging_steps 10 \\\n",
        "    --save_strategy steps \\\n",
        "    --save_total_limit 3 \\\n",
        "    --save_steps 50 \\\n",
        "    --gradient_accumulation_steps 1 \\\n",
        "    --preprocessing_num_workers 8 \\\n",
        "    --max_seq_length 512 \\\n",
        "    --output_dir /content/output_model \\\n",
        "    --overwrite_output_dir \\\n",
        "    --ddp_timeout 30000 \\\n",
        "    --logging_first_step True \\\n",
        "    --torch_dtype float16 \\\n",
        "    --peft_path ziqingyang/chinese-alpaca-lora-7b \\\n",
        "    --gradient_checkpointing \\\n",
        "    --ddp_find_unused_parameters False"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "hvVWDy9YPzG1"
      },
      "source": [
        "After training, rename saved `pytorch_model.bin` to `adapter_model.bin`"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KnA4qnBCX3ev"
      },
      "outputs": [],
      "source": [
        "!mkdir output_model/peft_model\n",
        "!mv output_model/pytorch_model.bin output_model/peft_model/adapter_model.bin"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "hDRJlD8sYs7E"
      },
      "source": [
        "Lastly, you need to manually create an `adapter_config.json` under `peft_model` and fill in the hyperparamters such as `lora_rank`, `lora_alpha` etc., whose content and \n",
        "format can be referenced from the corresponding file in Chinese-Alpaca-LoRA."
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "A100",
      "machine_shape": "hm",
      "provenance": []
    },
    "gpuClass": "standard",
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
